library(maps)
library(ggmap)
## Loading required package: ggplot2
## ℹ Google's Terms of Service: <https://mapsplatform.google.com>
## ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ tibble  3.1.8     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.1
## ✔ readr   2.1.2     ✔ forcats 0.5.2
## ✔ purrr   0.3.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ purrr::map()    masks maps::map()
library(nycflights13)
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggmap':
## 
##     wind
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout

The qplot Function

The function qplot() in the package ggplot2 is very similar to the basic plot() function from the R base package. It can be used to create and combine easily different types of plots, it’s great for allowing you to produce plots quickly, However, it remains less flexible than the function ggplot(), I highly recommend using ggplot() as it makes it easier to create complex graphics.

This lab provides a brief introduction to qplot(), which stands for quick plot.

tips <- read.csv("tips.csv")

summary(tips)
##       obs            totbill           tip             sex           
##  Min.   :  1.00   Min.   : 3.07   Min.   : 1.000   Length:244        
##  1st Qu.: 61.75   1st Qu.:13.35   1st Qu.: 2.000   Class :character  
##  Median :122.50   Median :17.80   Median : 2.900   Mode  :character  
##  Mean   :122.50   Mean   :19.79   Mean   : 2.998                     
##  3rd Qu.:183.25   3rd Qu.:24.13   3rd Qu.: 3.562                     
##  Max.   :244.00   Max.   :50.81   Max.   :10.000                     
##     smoker              day                time                size     
##  Length:244         Length:244         Length:244         Min.   :1.00  
##  Class :character   Class :character   Class :character   1st Qu.:2.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :2.00  
##                                                           Mean   :2.57  
##                                                           3rd Qu.:3.00  
##                                                           Max.   :6.00

Basic plot in R

plot(tips$totbill, tips$tip)

Example 1: Histogram

ggplot(tips, aes(x = tip)) +
  geom_histogram(binwidth = 0.2)

qplot() is a shortcut of ggplot():

qplot(x, y, data=, geom=, color=variable, fill=, shape=, size=, alpha=, facets=, xlab=, ylab=, main=,)

OR

qplot(variables, geometry, color, size, shape, facet, dataset)
qplot(x = tip, data = tips, geom = "histogram", binwidth = 0.2)

Add blue/green color to the histogram

qplot(x = tip, data = tips, geom = "histogram", fill = "green", binwidth = 0.2)

Apparently, the color is not filled in green. This is because aesthetic parameters in qplot() always try to map the aesthetic to a variable. We need to use I(value) to indicate a specific value.

qplot(x = tip, data = tips, geom = "histogram", fill = I("green"), binwidth = 0.2)

Set color according to the variable sex

qplot(x = tip, data = tips, geom = "histogram", fill = sex, binwidth = 0.2)

Example 2 Scatter plot

qplot(totbill, tip, data = tips, geom = "point", col = sex)

Note: qplot() gives a scatter plot by default.

qplot(totbill, tip, data = tips, col = sex)

Add more geom arguments by a vector of multiple geom names in turn:

qplot(x = totbill, y = tip, data = tips, geom = c("point", "smooth"), col = sex)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Add labels:

qplot(x = totbill, y = tip, data = tips, geom = c("point", "smooth"), col = sex, xlab = "Total Bill", ylab = "Tip", main = "Plot of Total Bill vs. Tip")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Example 3: Side-by-side box plot sex vs. tip

qplot(x = sex, y = tip, data = tips, geom = "boxplot", col = time, facets = ~smoker)

Example 4: For the mtcars data set, provide a qplot to see the relation between wt and mpg, set the color of points to cyl, give different lm smooth lines according to cyl.

qplot(x = wt, y = mpg, data = mtcars, geom = c("point", "smooth"), method = "lm", col = factor(cyl))
## Warning: Ignoring unknown parameters: method
## `geom_smooth()` using formula 'y ~ x'

The plotly Package

plotly is an R package for creating interactive web-based graphs.

There are two main ways to initiate a plotly object in R. The plot_ly() function transforms data into a plotly object, while the ggplotly() function transforms a ggplot object into a plotly object. Regardless of how a plotly object is created, printing it results in an interactive web-based visualization with tooltips, zooming, and panning enabled by default.

library(plotly)

There are two ways to make plotly graphs.

  1. ggplotly()
  2. plot_ly()

ggplotly() Function

Recall from our qplot example.

qplot(data = mtcars, x = mpg, y = wt, col = factor(cyl), geom = c("point", "smooth"), method = "lm")
## Warning: Ignoring unknown parameters: method
## `geom_smooth()` using formula 'y ~ x'

To create a ggplotly() graph, create the ggplot() first.

plot <- ggplot(mtcars, aes(x = mpg, y = wt, col = factor(cyl))) + 
    geom_point() + 
    geom_smooth(method = "lm")
ggplotly(plot)
## `geom_smooth()` using formula 'y ~ x'
library(gapminder)

glimpse(gapminder)
## Rows: 1,704
## Columns: 6
## $ country   <fct> "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", …
## $ continent <fct> Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, …
## $ year      <int> 1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, …
## $ lifeExp   <dbl> 28.801, 30.332, 31.997, 34.020, 36.088, 38.438, 39.854, 40.8…
## $ pop       <int> 8425333, 9240934, 10267083, 11537966, 13079460, 14880372, 12…
## $ gdpPercap <dbl> 779.4453, 820.8530, 853.1007, 836.1971, 739.9811, 786.1134, …

ggplotly() will animate time (frame refers to animation)

p <- ggplot(gapminder, aes(x = gdpPercap, y = lifeExp, color = continent, size = pop, frame = year, text = country, id = country)) + 
  geom_point() + 
  scale_x_log10()
ggplotly(p)

We can also change the animation part in the ggplotly() function.

  • transition: the duration of the smooth transition between frame (in milliseconds)

  • easing: the type of transition easing (linear makes the movement smooth, elastic will make the points “bounce” around)

  • mode: describes how a new animate call interacts with currently-running animations.

ggplotly(p) %>%
  animation_opts(transition = 500, easing = "linear", mode = "immediate")

We can also change the place of the Play button and other details on the graph.

ggplotly(p) %>% 
  animation_opts(1000, easing = "elastic", redraw = FALSE) %>% 
  animation_button(x = 1, xanchor = "right", y = 0, yanchor = "bottom") %>%
  animation_slider(currentvalue = list(prefix = "YEAR ", font = list(color="red")))

plot_ly() Function

The plot_ly() function provides a direct interface to plotly.js, so anything in the figure reference can be specified via plot_ly().

A plotly visualization is composed of one (or more) trace(s), and every trace has a type (the default trace type is “scatter”) can be used to draw a large amount of geometries along with the add_XX() functions.

The plot_ly() function has a number of arguments that make it easier to scale data values to visual aesthetics (e.g.,*color/colors, symbol/symbols, linetype/linetypes, size/sizes).

The syntax of plot_ly() is similar to that of qplot():

plot_ly(data, x, y, symbol, size,  type, mode, color)

type is type of graph/depiction

Histogram;

Add ~ to map the variable

plot_ly(data = tips, x = ~tip, type = "histogram")

Or we can use

plot_ly(tips, x = ~tip) %>%
  add_histogram()

The code above will produce the same graph, but split into several lines with add_XX()

To add time as a factor of color:

plot_ly(tips, x = ~tip, color = ~time) %>%
  add_histogram()
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

Bar plot

plot_ly(data = tips, x = ~day, type = "bar")

plot_ly() will not do the statistical transformation for us, so we need to do it ourselves.

tips %>%
  count(day) %>%
  plot_ly(x = ~day, y = ~n) %>%
  add_bars()

Note: Bar traces required bar heights (both x and y), whereas histogram traces require just a single variable

Example:

# Produce a correlation table
corr <- cor(dplyr::select_if(diamonds, is.numeric))
corr
##            carat       depth      table      price           x           y
## carat 1.00000000  0.02822431  0.1816175  0.9215913  0.97509423  0.95172220
## depth 0.02822431  1.00000000 -0.2957785 -0.0106474 -0.02528925 -0.02934067
## table 0.18161755 -0.29577852  1.0000000  0.1271339  0.19534428  0.18376015
## price 0.92159130 -0.01064740  0.1271339  1.0000000  0.88443516  0.86542090
## x     0.97509423 -0.02528925  0.1953443  0.8844352  1.00000000  0.97470148
## y     0.95172220 -0.02934067  0.1837601  0.8654209  0.97470148  1.00000000
## z     0.95338738  0.09492388  0.1509287  0.8612494  0.97077180  0.95200572
##                z
## carat 0.95338738
## depth 0.09492388
## table 0.15092869
## price 0.86124944
## x     0.97077180
## y     0.95200572
## z     1.00000000
# Produce a correlation heat map
plot_ly() %>%
  add_heatmap(x = rownames(corr), y = colnames(corr), z = corr) %>%
  colorbar(limits = c(-1, 1))

Box plot

plot_ly(data = tips, y = ~tip, type = "box")
p1 <- plot_ly(data = tips, y = ~tip) %>%
  add_boxplot

p2 <- plot_ly(data = tips, y = ~tip, x = ~time) %>%
    add_boxplot

subplot(p1, p2)
plot_ly(tips, y = ~tip, x = ~interaction(time, day)) %>%
  add_boxplot(color = ~time)
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

Add more attributes to the boxplot

# Add points to the boxplot, pointpos moves where the points are and marker changes the color of the points
plot_ly(tips, x = ~tip, type = "box", boxpoints = "all", pointpos = 1, marker = list(color = "green")) %>%
  layout(title = "boxplot of tips") #adds a title to the boxplot

Scatter Plot

For two quantitative variables, plot_ly() defaults to a scatterplot, but you can also be explicit about adding a layer of markers/points via the add_markers() function.

plot_ly(data = tips, x = ~totbill, y = ~tip)
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
plot_ly(data = tips, x = ~totbill, y = ~tip, text = ~day) %>%
  add_markers(color = ~sex)
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

Different Modes in Scatter Plot

plot_ly(data = tips, x = ~totbill, y = ~tip, color = ~sex, text = ~day, mode = "markers")
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
plot_ly(data = tips, x = ~totbill, y = ~tip, color = ~sex, type = "scatter", text = ~day, mode = "markers+text")
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
plot_ly(tips, x = ~totbill, y = ~tip, type = "scatter", mode = "text", text = ~day)
plot_ly(tips, x = ~totbill, y = ~tip, type = "scatter", mode = "text", text = ~paste("day:", day))

Add Comments

Example 1: For gapminder, select year at 2002, make a plotly object to visualize the relation between log(gdpPercap) and lifeExp, set color according to continent, size according to population, text according to country, mark the largest lifeExp with the text “Longest LifeExp” in the plot.

gapminder %>%
  filter(year == 2002) %>%
  plot_ly(x = ~log(gdpPercap), y = ~lifeExp, color = ~continent, size = ~pop, text = ~country) %>%
  add_markers() %>%
  slice(which.max(lifeExp)) %>%
  add_annotations(text = "Longest LifeExp")
## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

## Warning: `line.width` does not currently support multiple values.

Example 2: Mark the max and min mpg

plot_ly(data = mtcars, x = ~wt, y = ~mpg) %>%
  slice(c(which.max(mpg), which.min(mpg))) %>%
  add_annotations(text = c("Max mpg", "Min mpg"))
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode

If we only want to point out the values at the maximum and minimum:

plot_ly(data = mtcars, x = ~wt, y = ~mpg) %>%
  slice(c(which.max(mpg), which.min(mpg))) %>%
  add_annotations(text = ~mpg)
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode

If we want to point out the corresponding cyl value at the maximum and minimum mpg.

plot_ly(data = mtcars, x = ~wt, y = ~mpg) %>%
  slice(c(which.max(mpg), which.min(mpg))) %>%
  add_annotations(text = ~factor(cyl))
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode

Example 3: Mark each point with the values of cyl.

plot_ly(data = mtcars, x = ~wt, y = ~mpg, mode = "text", text = ~factor(cyl))
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter

Example 4: will only mark each point that a cyl of 4 (filter will only affect the annotations in the plot, not the whole data set)

plot_ly(mtcars, x = ~wt, y = ~mpg, mode = "markers", color = ~factor(cyl)) %>%
  add_annotations(text = ~cyl, data = filter(mtcars, cyl == 4))
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter

Line Graph

Line graph is good for time series datasets to see trends.

Example 1: The airmiles data set

data("airmiles")
airmiles
## Time Series:
## Start = 1937 
## End = 1960 
## Frequency = 1 
##  [1]   412   480   683  1052  1385  1418  1634  2178  3362  5948  6109  5981
## [13]  6753  8003 10566 12528 14760 16769 19819 22362 25340 25343 29269 30514

Basic scatter plot

plot_ly(x = ~time(airmiles), y = ~airmiles, type = "scatter", mode = "markers")

Add lines to the plot

plot_ly(x = ~time(airmiles), y = ~airmiles, type = "scatter", mode = "markers+lines")
plot_ly(x = ~time(airmiles), y = ~airmiles, type = "scatter", mode = "lines")

OR

plot_ly(x = ~time(airmiles), y = ~airmiles) %>%
  add_lines()

Example 2: The txhousing data set

head(txhousing)
## # A tibble: 6 × 9
##   city     year month sales   volume median listings inventory  date
##   <chr>   <int> <int> <dbl>    <dbl>  <dbl>    <dbl>     <dbl> <dbl>
## 1 Abilene  2000     1    72  5380000  71400      701       6.3 2000 
## 2 Abilene  2000     2    98  6505000  58700      746       6.6 2000.
## 3 Abilene  2000     3   130  9285000  58100      784       6.8 2000.
## 4 Abilene  2000     4    98  9730000  68600      785       6.9 2000.
## 5 Abilene  2000     5   141 10590000  67300      794       6.8 2000.
## 6 Abilene  2000     6   156 13910000  66900      780       6.6 2000.
plot_ly(txhousing, x = ~date, y = ~median, mode = "line", color = ~city)
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#scatter
## Warning: Ignoring 616 observations
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
plot_ly(txhousing, x=  ~date, y = ~median) %>%
  add_lines(color = ~city) %>%
  hide_legend()
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

When a data frame is associated with a plotly object, it allows us to manipulate the data underlying that object in the same way we would directly manipulate the data (dplyr)

We can manipulate data, express complex multi-layer plots, and chain all operations together via %>%. It is more readable and understandable.

txhousing %>%
  group_by(city) %>%
  plot_ly(x = ~date, y = ~median) %>%
  add_lines(name = "Texan cities", alpha = I(0.2)) %>%  #first trace: add one line per city
  filter(city == "Houston") %>%
  add_lines(name = "Houston", color = I("red")) # second trace

Sometimes the directed graph of a pipeline can be restrictive. (In this example, after filter the data down to Houston, there is no way to recover the original data inside the pipeline).

Solution 1:

txhousing %>%
  group_by(city) %>% 
  plot_ly(x = ~date, y = ~median) %>%
  # plots one line per city since p knows city is a grouping variable
  add_lines(alpha = I(0.2), name = "Texan Cities") %>%
  add_lines(name = "Houston", data = filter(txhousing, city == "Houston"), color = I("red")) %>%
  add_lines(name = "San Antonio", data = filter(txhousing, city == "San Antonio"), color = I("black"))

Solution 2:

The add_fun() function helps to work-around this restriction. It works by applying a function to the plotly object, but does not affect the data associated with the plotly object.

The add_fun() function that accepts a plot object as input, possibly applies a transformation to the data, and maps that data to visual objects.

layer_city <- function(plot, name) {    # a plot as input
    plot %>% filter(city == name) %>% add_lines(name = name)   # apply transformation to the data
  }
txhousing %>%
  group_by(city) %>%
  plot_ly(x=~date, y=~median) %>%
  add_lines(name="Texan cities", alpha=I(0.2)) %>%  # first trace: allcities
  add_fun(layer_city, "Houston") %>%     # map that data to plot object
  add_fun(layer_city, "San Antonio")

Extension: One more example for add_fun()

layer_iqr <- function(plot) {
  plot %>%
    group_by(date) %>% 
    summarise(
      q1 = quantile(median, 0.25, na.rm = TRUE),
      m = median(median, na.rm = TRUE),
      q3 = quantile(median, 0.75, na.rm = TRUE)
    ) %>%
    add_lines(y = ~m, name = "median", color = I("black")) %>%
    add_ribbons(ymin = ~q1, ymax = ~q3, name = "IQR", color = I("yellow"))
}
txhousing %>%
  group_by(city) %>%
  plot_ly(x=~date, y=~median) %>%
  add_lines(name="Texan cities", alpha=I(0.2)) %>%  # first trace: allcities
  add_fun(layer_iqr) %>%
  add_fun(layer_city, "Houston") %>%
  add_fun(layer_city, "San Antonio")

Example 3: Provide a line graph to visualize the trend of unemployment rate over time in economics data set, and mark the date on the point with the highest unemployment rate.

economics %>%
  plot_ly(x = ~date, y = ~unemploy) %>%
  add_lines() %>%
  slice(which.max(unemploy)) %>%
  add_annotations(text = ~date)

Extension on plot_ly()

Creating Maps

The ggmap Package

The package makes it easy to retrieve raster map tiles from popular online mapping services like Stamen Maps and Google Maps, and plot them using the ggplot2 framework:

We can add the points (with longitude and latitude) to the map

Example 1: find the airports with the highest arrival delay in a map

  • Create the data set
data1 <- flights %>%
  group_by(dest) %>%
  summarize(average = mean(arr_delay, na.rm = TRUE)) %>%
  left_join(airports, by=c("dest" = "faa"))

head(data1)
## # A tibble: 6 × 9
##   dest  average name                          lat    lon   alt    tz dst   tzone
##   <chr>   <dbl> <chr>                       <dbl>  <dbl> <dbl> <dbl> <chr> <chr>
## 1 ABQ      4.38 Albuquerque International …  35.0 -107.   5355    -7 A     Amer…
## 2 ACK      4.85 Nantucket Mem                41.3  -70.1    48    -5 A     Amer…
## 3 ALB     14.4  Albany Intl                  42.7  -73.8   285    -5 A     Amer…
## 4 ANC     -2.5  Ted Stevens Anchorage Intl   61.2 -150.    152    -9 A     Amer…
## 5 ATL     11.3  Hartsfield Jackson Atlanta…  33.6  -84.4  1026    -5 A     Amer…
## 6 AUS      6.02 Austin Bergstrom Intl        30.2  -97.7   542    -6 A     Amer…
  • Create a base layer US map

Note: Syntax:

c(lowerleftlon, lowerleftlat, upperrightlon, upperrightlat)
us <- c(left = -125, bottom = 25.75, right = -67, top = 49)

Different map types:

maptype= c("terrain", "terrain-background", "terrain-labels", "terrain-lines",
  "toner", "toner-2010", "toner-2011", "toner-background", "toner-hybrid",
  "toner-labels", "toner-lines", "toner-lite", "watercolor") 
map <- get_stamenmap(us, zoom = 5, maptype = "toner-lite")
## ℹ Map tiles by Stamen Design, under CC BY 3.0. Data by OpenStreetMap, under ODbL.
ggmap(map)  

  • Add points on the map
map1 <- ggmap(map) + 
  geom_point(data = data1, aes(x = lon, y = lat, color = average, size = average, text = name), na.rm = T) +
  scale_color_gradient(low = "green", high = "darkblue")
## Warning: Ignoring unknown aesthetics: text
ggplotly(map1)

Example 2: find the most popular airport in a map

  • Work with the data set
data2 <- flights %>%
  drop_na() %>%
  count(dest) %>%
  left_join(airports, by = c("dest" = "faa"))
  • Create the map
us <- c(left = -125, bottom = 25.75, right = -67, top = 49)
map <- get_stamenmap(us, zoom = 5, maptype = "toner-lite")
## ℹ Map tiles by Stamen Design, under CC BY 3.0. Data by OpenStreetMap, under ODbL.
map2 <- ggmap(map) + 
  geom_point(data = data2, aes(x = lon, y = lat, color = n, size = n, text = name), na.rm = T) +
  scale_color_gradient(low = "blue", high = "red")
## Warning: Ignoring unknown aesthetics: text
ggplotly(map2)

The maps Pacakge

It allows us to turn data from the maps into a data frame suitable for plotting with ggplot.

The structure of those data frames:

  • long: longitude.
  • lat: latitude.
  • order: shows in which order ggplot should “connect the dots”
  • region and subregion: tell what region or subregion a set of points surrounds.
  • group: This is very important! ggplot2’s functions can take a group argument which controls whether adjacent points should be connected by lines. If they are in the same group, then they get connected, but if they are in different groups then they don’t.

Plot the USA map: using geom_polygon().

geom_polygon() drawn lines between points and “closes them up” (i.e. draws a line from the last point back to the first point). You have to map the group aesthetic to the group column.

Example 1: Get data set with States info to get a map

  • Get the State information
states <- map_data("state") 
  • Create the map
qplot(long, lat, data=states)

qplot(long, lat, data = map_data("world2"))

Example 2: We need to use group=group if we want to connet the map with lines

qplot(long, lat, data = states, geom = "path")

qplot(long, lat, data = states, geom = "path", group = group)

Example 3 Use geom = "polygon"

qplot(long, lat, data = states, geom = "polygon", group = group)

We can change the boarder color:

ggplot(states) +
  geom_polygon(aes(x = long, y = lat, group = group), color = "red")

We can also change the filled color

qplot(long, lat, data = states, geom = "polygon", group = group, fill = long, color = "red")

Example 4: coord_fixed() fixes the relationship between one unit in the y direction and one unit in the x direction. every y unit was 1.3 times longer than an x unit, the plot came out looking good.

ggplot(data = states) + 
  geom_polygon(aes(x = long, y = lat, fill = region, group = group), color = "white") + 
  coord_fixed(1.3) +
  guides(fill = FALSE)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

Note: guides(fill = FALSE) turns off the color legend.

Example 5: Create a map of the West Coast of the US.

west_coast <- subset(states, region %in% c("california", "oregon", "washington"))

ggplot(data = west_coast) + 
  geom_polygon(aes(x = long, y = lat, group = group), fill = "palegreen", color = "black")

Apparently, the scale of this map is not correct. We can use coord_quickmap() to let R choose the suitable ratio for us.

ggplot(data = west_coast) + 
  geom_polygon(aes(x = long, y = lat, group = group), fill = "palegreen", color = "black") + 
  coord_quickmap()

Example 6: Get a state map (Use Georgia for example)

ga_df <- map_data("state")  %>%
  filter(region == "georgia")
ggplot(data = ga_df) + 
  geom_polygon(aes(x = long, y = lat, group = group), fill = "palegreen", color = "black")

We can change the theme to make the map more distinctive.

ggplot(data = ga_df) + 
  geom_polygon(aes(x = long, y = lat, group = group), fill = "palegreen", color = "black") + 
  coord_quickmap() + 
  theme_void()

We can change the color.

ggplot(data = ga_df, mapping = aes(x = long, y = lat, group = group)) + 
  coord_quickmap() + 
  geom_polygon(color = "black", fill = "gray") + 
  theme_void()

Example 7: Get county map

state_df <- map_data("county") %>%
    filter(region == "georgia")
ggplot(data = ga_df, mapping = aes(x = long, y = lat, group = group)) + 
  coord_quickmap() + 
  geom_polygon(color = "black", fill = "gray") +  
  theme_void() + 
  geom_polygon(data = ga_df, aes(x = long, y = lat, group = group),fill = NA, color = "white") +
  geom_polygon(data = state_df, color = "black", fill = NA) 

Example 8: Mark the population of each county in Georgia

  • Load the external data set of population
GAdat <- read.csv("GAdat.csv")
  • Clean the external data set
GAdat$County <- tolower(GAdat$County)

ga_df$subregion <- replace(ga_df$subregion, ga_df$subregion=="de kalb", "dekalb")
mapdat <- left_join(GAdat, state_df, by = c("County" = "subregion"))
  • Create the map
ggplot(mapdat, aes(x=long, y=lat, group = group)) +
  geom_polygon(aes(fill = Population, color="yellow"),colour = alpha("red", 1/2))  +
  scale_fill_gradient(low="blue", high="red") +
  geom_polygon(data = state_df, colour = "black", fill = NA) + theme_void() +
  coord_quickmap()

Example 9: We can also play with colors:

ggplot(mapdat, aes(long, lat, group = group)) +
  geom_polygon(aes(fill = Population, color="yellow"), colour = alpha("red", 1/2))  +
  geom_polygon(data = state_df, colour = "black", fill = NA) + 
  theme_void() +
  coord_fixed(1.2)+
  scale_fill_gradientn(colours = rev(rainbow(7)),
                         breaks = c(2, 4, 10, 100, 1000, 10000),
                         trans = "log10")

world <- map_data("world")

ggplot(data = world) + 
 geom_polygon(aes(x = long, y = lat, group = group, fill = region), color = "white") +
 coord_quickmap() +
 guides(fill = FALSE)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.